Setup
(hide)
Click on tabs to display additional information.
Libraries
# Set seed for reproducibility
set.seed(42)
# Load required libraries for DESeq2 analysis and visualization
library(DESeq2)
library(SummarizedExperiment)
library(ggplot2)
library(pheatmap)
library(RColorBrewer)
library(ggrepel)
library(tidyverse)
library(VennDiagram)
library(gridExtra)
library(BiocParallel)
library(gt)
library(broom)
library(car)
library(emmeans)
library(multcomp)
library(MASS)
library(rstatix)
library(coin)
Plotting
# Define treatment order and color palette for plotting (original naming convention)
treatment_order_plot <- c(
"A- T- P-", # Control
"A- T- P+", # Parasite
"A+ T- P-", # Antibiotics
"A+ T- P+", # Antibiotics_Parasite
"A- T+ P-", # Temperature
"A- T+ P+", # Temperature_Parasite
"A+ T+ P-", # Antibiotics_Temperature
"A+ T+ P+" # Antibiotics_Temperature_Parasite
)
# Custom color palette matching treatment order
treatment_colors <- c(
"#1B9E77", # A- T- P- (Control)
"#D95F02", # A- T- P+ (Parasite)
"#7570B3", # A+ T- P- (Antibiotics)
"#E7298A", # A+ T- P+ (Antibiotics_Parasite)
"#66A61E", # A- T+ P- (Temperature)
"#E6AB02", # A- T+ P+ (Temperature_Parasite)
"#A6761D", # A+ T+ P- (Antibiotics_Temperature)
"#666666" # A+ T+ P+ (Antibiotics_Temperature_Parasite)
)
# Create named vector for color scale
treatment_color_scale <- setNames(treatment_colors, treatment_order_plot)
# Define treatment order for DESeq2 (keep original DESeq2-compatible names)
treatment_order_deseq <- c(
"A_minus_T_minus_P_minus", # Control
"A_minus_T_minus_P_plus", # Parasite
"A_plus_T_minus_P_minus", # Antibiotics
"A_plus_T_minus_P_plus", # Antibiotics_Parasite
"A_minus_T_plus_P_minus", # Temperature
"A_minus_T_plus_P_plus", # Temperature_Parasite
"A_plus_T_plus_P_minus", # Antibiotics_Temperature
"A_plus_T_plus_P_plus" # Antibiotics_Temperature_Parasite
)
# Create mapping between DESeq2 names and plotting names
treatment_mapping <- setNames(treatment_order_plot, treatment_order_deseq)
# Mapping from DESeq2 contrast names to plotting-friendly names
contrast_to_plotname <- c(
"A_minus_T_minus_P_minus_vs_A_minus_T_minus_P_minus" = "A- T- P-",
"A_minus_T_minus_P_plus_vs_A_minus_T_minus_P_minus" = "A- T- P+",
"A_plus_T_minus_P_minus_vs_A_minus_T_minus_P_minus" = "A+ T- P-",
"A_plus_T_minus_P_plus_vs_A_minus_T_minus_P_minus" = "A+ T- P+",
"A_minus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = "A- T+ P-",
"A_minus_T_plus_P_plus_vs_A_minus_T_minus_P_minus" = "A- T+ P+",
"A_plus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = "A+ T+ P-",
"A_plus_T_plus_P_plus_vs_A_minus_T_minus_P_minus" = "A+ T+ P+"
)
Functions
# Function to convert DESeq2 treatment names to plotting names
convert_treatment_names <- function(deseq_names) {
return(treatment_mapping[deseq_names])
}
# Function to monitor system resources during analysis
monitor_performance <- function() {
# Get memory usage
mem_usage <- gc()
# Get CPU usage (if possible)
cpu_info <- tryCatch({
if (Sys.info()["sysname"] == "Darwin") {
# macOS
cpu_cmd <- "top -l 1 -n 0 | grep 'CPU usage'"
cpu_output <- system(cpu_cmd, intern = TRUE)
cpu_output
} else if (Sys.info()["sysname"] == "Linux") {
# Linux
cpu_cmd <- "top -bn1 | grep 'Cpu(s)'"
cpu_output <- system(cpu_cmd, intern = TRUE)
cpu_output
} else {
"CPU monitoring not available on this system"
}
}, error = function(e) {
"CPU monitoring not available"
})
cat("=== Performance Monitor ===\n")
cat("Memory usage:\n")
print(mem_usage)
cat("CPU info:\n")
cat(cpu_info, "\n")
cat("Active parallel workers:", bp_param$workers, "\n")
cat("========================\n")
}
# Function to optimize memory usage
optimize_memory <- function() {
cat("Optimizing memory usage...\n")
# Force garbage collection
gc(verbose = FALSE)
# Clear unnecessary objects
if (exists("temp_objects")) {
rm(temp_objects)
}
cat("Memory optimization complete\n")
}
# Function to save DESeq2 results
save_deseq_results <- function(results_list, analysis_name) {
# Create results directory if it doesn't exist
results_dir <- here::here("Code", "Analysis", "DiffExpGene", "Saved_Results")
dir.create(results_dir, recursive = TRUE, showWarnings = FALSE)
# Save the results list
results_file <- file.path(results_dir, paste0(analysis_name, "_deseq_results.rds"))
saveRDS(results_list, results_file)
cat("Results saved to:", results_file, "\n")
}
# Function to load DESeq2 results
load_deseq_results <- function(analysis_name) {
results_dir <- here::here("Code", "Analysis", "DiffExpGene", "Saved_Results")
results_file <- file.path(results_dir, paste0(analysis_name, "_deseq_results.rds"))
if (file.exists(results_file)) {
cat("Loading existing results for:", analysis_name, "\n")
return(readRDS(results_file))
} else {
cat("No existing results found for:", analysis_name, "\n")
return(NULL)
}
}
# Function to check if results exist
results_exist <- function(analysis_name) {
results_dir <- here::here("Code", "Analysis", "DiffExpGene", "Saved_Results")
results_file <- file.path(results_dir, paste0(analysis_name, "_deseq_results.rds"))
return(file.exists(results_file))
}
# Function to clean up saved results
cleanup_saved_results <- function(analysis_names = NULL) {
results_dir <- here::here("Code", "Analysis", "DiffExpGene", "Saved_Results")
if (is.null(analysis_names)) {
# Remove all saved results
if (dir.exists(results_dir)) {
unlink(results_dir, recursive = TRUE)
cat("All saved results removed\n")
}
} else {
# Remove specific analysis results
for (analysis_name in analysis_names) {
results_file <- file.path(results_dir, paste0(analysis_name, "_deseq_results.rds"))
if (file.exists(results_file)) {
file.remove(results_file)
cat("Removed saved results for:", analysis_name, "\n")
}
}
}
}
# Function to run DESeq2 analysis for a single comparison
run_deseq_comparison <- function(dds, contrast_name, contrast_vector) {
cat("Running analysis for:", contrast_name, "\n")
# Set seed for reproducibility within parallel processes
set.seed(42)
# Optimize DESeq2 settings for speed
# Use faster estimation method for large datasets
if (nrow(dds) > 10000) {
# For large datasets, use faster estimation
dds_comparison <- DESeq(dds,
parallel = TRUE,
fitType = "local", # Faster than "parametric"
betaPrior = FALSE, # Disable beta prior for speed
quiet = FALSE)
} else {
# For smaller datasets, use standard settings
dds_comparison <- DESeq(dds,
parallel = TRUE,
quiet = FALSE)
}
# Get results with optimized settings
res_comparison <- results(dds_comparison,
contrast = contrast_vector,
parallel = TRUE,
alpha = 0.05) # Set significance threshold
# Add gene names if available
if ("gene_name" %in% names(mcols(dds_comparison))) {
res_comparison$gene_name <- mcols(dds_comparison)$gene_name
}
# Clean up memory
rm(dds_comparison)
gc()
return(res_comparison)
}
# Function to run all comparisons for a specific analysis with enhanced parallelization
run_analysis_set <- function(analysis_name, analysis_config, force_recompute = FALSE) {
cat("\n=== Running", analysis_name, "===\n")
cat("Description:", analysis_config$description, "\n")
# Check if results already exist and force_recompute is FALSE
if (!force_recompute && results_exist(analysis_name)) {
cat("Loading existing results for", analysis_name, "\n")
return(load_deseq_results(analysis_name))
}
# Create a copy of the dataset for this analysis
dds_analysis <- dds_filtered
# Update design if needed
design(dds_analysis) <- analysis_config$design
# Pre-estimate size factors and dispersions for better parallelization
cat("Pre-estimating size factors and dispersions...\n")
dds_analysis <- estimateSizeFactors(dds_analysis)
dds_analysis <- estimateDispersions(dds_analysis, parallel = TRUE)
# Run all contrasts for this analysis
results_list <- list()
# Use parallel processing for multiple contrasts
if (length(analysis_config$contrasts) > 1) {
cat("Running", length(analysis_config$contrasts), "contrasts in parallel...\n")
# Create a function for parallel execution
run_single_contrast <- function(contrast_info) {
contrast_name <- contrast_info$name
contrast_vector <- contrast_info$vector
# Check if both treatment levels exist in the data
if (all(contrast_vector[2:3] %in% levels(colData(dds_analysis)$Treatment))) {
return(list(name = contrast_name, result = run_deseq_comparison(dds_analysis, contrast_name, contrast_vector)))
} else {
cat("Warning: Contrast", contrast_name, "skipped - treatment levels not found in data\n")
return(list(name = contrast_name, result = NULL))
}
}
# Prepare contrast information for parallel processing
contrast_info_list <- lapply(names(analysis_config$contrasts), function(name) {
list(name = name, vector = analysis_config$contrasts[[name]])
})
# Run contrasts in parallel
parallel_results <- BiocParallel::bplapply(contrast_info_list, run_single_contrast)
# Organize results
for (result in parallel_results) {
if (!is.null(result$result)) {
results_list[[result$name]] <- result$result
}
}
} else {
# For single contrast, run directly
for (contrast_name in names(analysis_config$contrasts)) {
contrast_vector <- analysis_config$contrasts[[contrast_name]]
# Check if both treatment levels exist in the data
if (all(contrast_vector[2:3] %in% levels(colData(dds_analysis)$Treatment))) {
results_list[[contrast_name]] <- run_deseq_comparison(dds_analysis, contrast_name, contrast_vector)
} else {
cat("Warning: Contrast", contrast_name, "skipped - treatment levels not found in data\n")
}
}
}
# Save the results
save_deseq_results(results_list, analysis_name)
# Clean up memory
rm(dds_analysis)
gc()
return(results_list)
}
# Function to format p-values for gt tables
fmt_pvalues <- function(gt_tbl, columns, threshold = 0.001, decimals = 3) {
gt_tbl |>
gt::fmt_number(columns = all_of(columns), decimals = decimals) |>
gt::fmt(
columns = all_of(columns),
rows = Reduce(`|`, lapply(columns, function(col) gt_tbl$`_data`[[col]] < threshold)),
fns = function(x) rep(paste0("<", threshold), length(x))
)
}
# Function to save results for each analysis
save_analysis_results <- function(analysis_name, results_list) {
cat("Saving results for:", analysis_name, "\n")
# Create subdirectory for this analysis
analysis_dir <- file.path(output_dir, analysis_name)
dir.create(analysis_dir, recursive = TRUE, showWarnings = FALSE)
# Save each contrast result
for (contrast_name in names(results_list)) {
res <- results_list[[contrast_name]]
# Convert to data frame
res_df <- as.data.frame(res)
# Add gene names if available
if ("gene_name" %in% names(res_df)) {
res_df <- res_df %>%
dplyr::select(gene_name, everything()) %>%
dplyr::rename(gene = gene_name)
}
# Save full results
full_filename <- file.path(analysis_dir, paste0(contrast_name, "_full_results.tsv"))
write_tsv(res_df, full_filename)
# Save significant results (padj < 0.05)
sig_df <- res_df %>%
dplyr::filter(padj < 0.05) %>%
dplyr::arrange(padj)
if (nrow(sig_df) > 0) {
sig_filename <- file.path(analysis_dir, paste0(contrast_name, "_significant_results.tsv"))
write_tsv(sig_df, sig_filename)
# Create summary table
summary_table <- data.frame(
Contrast = contrast_name,
Total_Genes = nrow(res_df),
Significant_Genes = nrow(sig_df),
Upregulated = sum(sig_df$log2FoldChange > 0, na.rm = TRUE),
Downregulated = sum(sig_df$log2FoldChange < 0, na.rm = TRUE),
stringsAsFactors = FALSE
)
# Save summary
summary_filename <- file.path(analysis_dir, paste0(contrast_name, "_summary.tsv"))
write_tsv(summary_table, summary_filename)
}
}
# Create combined summary for the analysis
all_summaries <- list()
for (contrast_name in names(results_list)) {
res <- results_list[[contrast_name]]
res_df <- as.data.frame(res)
sig_df <- res_df %>% dplyr::filter(padj < 0.05)
all_summaries[[contrast_name]] <- data.frame(
Contrast = contrast_name,
Total_Genes = nrow(res_df),
Significant_Genes = nrow(sig_df),
Upregulated = sum(sig_df$log2FoldChange > 0, na.rm = TRUE),
Downregulated = sum(sig_df$log2FoldChange < 0, na.rm = TRUE),
stringsAsFactors = FALSE
)
}
combined_summary <- bind_rows(all_summaries)
combined_filename <- file.path(analysis_dir, paste0(analysis_name, "_combined_summary.tsv"))
write_tsv(combined_summary, combined_filename)
# Display summary table
cat("\nSummary for", analysis_name, ":\n")
print(combined_summary %>% gt::gt())
}
Import Data
# Read in metadata sheet
Metadata <- readxl::read_excel(here::here("Data", "Transcriptomics", "Data", "ROL_MajorExperiment__MetadataSheet__Corrected_05092025.xlsx"))
# Read in SummarizedExperiment object
se <- readRDS(here::here("Data", "Transcriptomics", "Results", "rnaseq", "051525", "salmon", "salmon.merged.gene_counts_length_scaled.SummarizedExperiment.rds"))
# Print basic information about the dataset
cat("Number of genes:", nrow(se), "\n")
## Number of genes: 39447
cat("Number of samples:", ncol(se), "\n")
## Number of samples: 72
cat("Sample names:", colnames(se), "\n")
## Sample names: TS047_RoL_RNA_110 TS047_RoL_RNA_112 TS047_RoL_RNA_114 TS047_RoL_RNA_116 TS047_RoL_RNA_119 TS047_RoL_RNA_136 TS047_RoL_RNA_142 TS047_RoL_RNA_145 TS047_RoL_RNA_146 TS047_RoL_RNA_16 TS047_RoL_RNA_170 TS047_RoL_RNA_175 TS047_RoL_RNA_177 TS047_RoL_RNA_20 TS047_RoL_RNA_200 TS047_RoL_RNA_204 TS047_RoL_RNA_229 TS047_RoL_RNA_231 TS047_RoL_RNA_258 TS047_RoL_RNA_262 TS047_RoL_RNA_286 TS047_RoL_RNA_288 TS047_RoL_RNA_289 TS047_RoL_RNA_291 TS047_RoL_RNA_317 TS047_RoL_RNA_318 TS047_RoL_RNA_319 TS047_RoL_RNA_322 TS047_RoL_RNA_326 TS047_RoL_RNA_328 TS047_RoL_RNA_347 TS047_RoL_RNA_355 TS047_RoL_RNA_377 TS047_RoL_RNA_381 TS047_RoL_RNA_407 TS047_RoL_RNA_412 TS047_RoL_RNA_439 TS047_RoL_RNA_442 TS047_RoL_RNA_46 TS047_RoL_RNA_468 TS047_RoL_RNA_469 TS047_RoL_RNA_477 TS047_RoL_RNA_478 TS047_RoL_RNA_498 TS047_RoL_RNA_500 TS047_RoL_RNA_502 TS047_RoL_RNA_506 TS047_RoL_RNA_51 TS047_RoL_RNA_526 TS047_RoL_RNA_528 TS047_RoL_RNA_530 TS047_RoL_RNA_532 TS047_RoL_RNA_559 TS047_RoL_RNA_561 TS047_RoL_RNA_562 TS047_RoL_RNA_586 TS047_RoL_RNA_616 TS047_RoL_RNA_618 TS047_RoL_RNA_646 TS047_RoL_RNA_651 TS047_RoL_RNA_652 TS047_RoL_RNA_655 TS047_RoL_RNA_676 TS047_RoL_RNA_677 TS047_RoL_RNA_678 TS047_RoL_RNA_679 TS047_RoL_RNA_683 TS047_RoL_RNA_707 TS047_RoL_RNA_708 TS047_RoL_RNA_710 TS047_RoL_RNA_79 TS047_RoL_RNA_85
cat("Available assays:", assayNames(se), "\n")
## Available assays: counts abundance
cat("Column data (metadata):", names(colData(se)), "\n")
## Column data (metadata): sample fastq_1 fastq_2 strandedness
Data Preprocessing
# Clean sample names by removing "TS047_RoL_RNA_" prefix
clean_names <- gsub("TS047_RoL_RNA_", "", colnames(se))
# Update column names in the SummarizedExperiment object
colnames(se) <- clean_names
# Create a data frame for the metadata columns we want to add
metadata_cols <- c("Time", "Antibiotics", "Temperature",
"Pathogen", "History", "Length.mm", "Weight.g", "Total.Worm.Count")
# Initialize a data frame to store the metadata
sample_metadata <- data.frame(
Sample_Name = clean_names,
stringsAsFactors = FALSE
)
rownames(sample_metadata) <- clean_names
# Add each metadata column
for (col in metadata_cols) {
values <- rep(NA, length(clean_names))
for (i in seq_along(clean_names)) {
matches <- which(Metadata$Sample_Name == clean_names[i])
if (length(matches) == 1) {
values[i] <- Metadata[matches, col, drop = TRUE]
}
}
sample_metadata[[col]] <- values
}
# Add the metadata to the SummarizedExperiment object
colData(se) <- DataFrame(sample_metadata)
cat("Metadata added to SummarizedExperiment object\n")
## Metadata added to SummarizedExperiment object
Create DESeq2 Dataset
# Remove samples with NA values in treatment columns
se_filtered <- se[, !is.na(colData(se)$Antibiotics) & !is.na(colData(se)$Temperature) & !is.na(colData(se)$Pathogen)]
cat("Number of samples after removing NAs:", ncol(se_filtered), "\n")
## Number of samples after removing NAs: 72
# Create treatment combinations using DESeq2-compatible format
colData(se_filtered)$Treatment <- paste0(
ifelse(colData(se_filtered)$Antibiotics == 1, "A_plus", "A_minus"), "_",
ifelse(colData(se_filtered)$Temperature == 1, "T_plus", "T_minus"), "_",
ifelse(colData(se_filtered)$Pathogen == 1, "P_plus", "P_minus")
)
# Create plotting-friendly treatment names
colData(se_filtered)$Treatment_Plot <- paste0(
ifelse(colData(se_filtered)$Antibiotics == 1, "A+", "A-"), " ",
ifelse(colData(se_filtered)$Temperature == 1, "T+", "T-"), " ",
ifelse(colData(se_filtered)$Pathogen == 1, "P+", "P-")
)
# Convert treatment to factor with specified order (DESeq2 names)
colData(se_filtered)$Treatment <- factor(colData(se_filtered)$Treatment, levels = treatment_order_deseq)
# Convert plotting treatment to factor with specified order
colData(se_filtered)$Treatment_Plot <- factor(colData(se_filtered)$Treatment_Plot, levels = treatment_order_plot)
# Print the distribution of treatment combinations
cat("\nTreatment combination distribution:\n")
##
## Treatment combination distribution:
print(table(colData(se_filtered)$Treatment))
##
## A_minus_T_minus_P_minus A_minus_T_minus_P_plus A_plus_T_minus_P_minus
## 6 12 6
## A_plus_T_minus_P_plus A_minus_T_plus_P_minus A_minus_T_plus_P_plus
## 12 6 12
## A_plus_T_plus_P_minus A_plus_T_plus_P_plus
## 6 12
print(table(colData(se_filtered)$Treatment_Plot))
##
## A- T- P- A- T- P+ A+ T- P- A+ T- P+ A- T+ P- A- T+ P+ A+ T+ P- A+ T+ P+
## 6 12 6 12 6 12 6 12
# Clean other factor levels
colData(se_filtered)$Time <- factor(colData(se_filtered)$Time)
colData(se_filtered)$Antibiotics <- factor(colData(se_filtered)$Antibiotics)
colData(se_filtered)$Temperature <- factor(colData(se_filtered)$Temperature)
colData(se_filtered)$Pathogen <- factor(colData(se_filtered)$Pathogen)
# Create initial design formula
design_formula <- ~ Treatment
# We need to use raw counts for DESeq2, not length-scaled counts
if (!"counts" %in% assayNames(se_filtered)) {
# Read in the raw counts file
raw_counts <- read.table(here::here("Data", "Transcriptomics", "Results", "rnaseq", "051525", "salmon", "salmon.merged.gene_counts_length_scaled.tsv"),
header = TRUE, row.names = 1)
# Convert to matrix and ensure it's numeric
counts_matrix <- as.matrix(raw_counts)
mode(counts_matrix) <- "numeric"
# Round to nearest integer (DESeq2 requires integer counts)
counts_matrix <- round(counts_matrix)
# Ensure the order of samples matches the SummarizedExperiment
counts_matrix <- counts_matrix[, colnames(se_filtered)]
} else {
# If counts are available, use them
counts_matrix <- as.matrix(assays(se_filtered)$counts)
mode(counts_matrix) <- "numeric"
counts_matrix <- round(counts_matrix)
}
# Create initial DESeq2 dataset
dds_initial <- DESeqDataSetFromMatrix(
countData = counts_matrix,
colData = colData(se_filtered),
design = design_formula
)
## converting counts to integer mode
# Pre-filtering: remove low abundance transcripts
cpm <- counts(dds_initial) / colSums(counts(dds_initial)) * 1e6
# Keep genes that have at least 2 CPM in at least 3 samples
keep <- rowSums(cpm >= 2) >= 3
dds_filtered <- dds_initial[keep,]
# Print filtering statistics
cat("Number of genes before filtering:", nrow(counts_matrix), "\n")
## Number of genes before filtering: 39447
cat("Number of genes after filtering:", nrow(dds_filtered), "\n")
## Number of genes after filtering: 20672
cat("Number of genes removed:", nrow(counts_matrix) - nrow(dds_filtered), "\n")
## Number of genes removed: 18775
cat("Percentage of genes kept:", round(nrow(dds_filtered)/nrow(counts_matrix)*100, 2), "%\n")
## Percentage of genes kept: 52.4 %
# Transform counts for visualization
rld <- vst(dds_filtered, blind=FALSE)
# Estimate size factors
dds_filtered <- estimateSizeFactors(dds_filtered)
# Get normalized counts
normalized_counts <- counts(dds_filtered, normalized=TRUE)
# After dds_filtered is created and before any downstream analyses:
# List of genes to exclude
genes_to_exclude <- c(
"zgc:112970",
"lectin",
"zp2.6",
"qdprb.4",
"bncr",
"avd",
"tdgf1",
"adad2",
"LOC137488852",
"ppt2a.1",
"snapc1a",
"exd1",
"LOC141381090",
"LOC141381093",
"LOC141378343",
"LOC101884348"
)
# Remove these genes from dds_filtered, rld, and normalized_counts
keep_genes <- !(rownames(dds_filtered) %in% genes_to_exclude)
dds_filtered <- dds_filtered[keep_genes, ]
# Re-transform counts for visualization after filtering
rld <- vst(dds_filtered, blind=FALSE)
# Estimate size factors again after filtering
dds_filtered <- estimateSizeFactors(dds_filtered)
# Get normalized counts after filtering
normalized_counts <- counts(dds_filtered, normalized=TRUE)
# Print exclusion summary
cat("Number of genes excluded:", length(genes_to_exclude), "\n")
## Number of genes excluded: 16
cat("Number of genes remaining after exclusion:", nrow(dds_filtered), "\n")
## Number of genes remaining after exclusion: 20657
Set up parallel processing
# Enhanced parallel processing setup
# Detect number of available cores
available_cores <- parallel::detectCores()
cat("Available cores:", available_cores, "\n")
## Available cores: 10
# Use 75% of available cores to avoid overwhelming the system
# Leave some cores free for other processes
num_cores <- max(1, floor(available_cores * 0.75))
cat("Using cores for parallel processing:", num_cores, "\n")
## Using cores for parallel processing: 7
# Create optimized BiocParallel parameters
bp_param <- MulticoreParam(
workers = num_cores,
tasks = num_cores * 2, # Increase task granularity
progressbar = TRUE # Enable progress reporting
)
# Register the optimized parameters
register(bp_param)
cat("Parallel processing configured with", num_cores, "cores\n")
## Parallel processing configured with 7 cores
cat("Task granularity:", bp_param$tasks, "\n")
## Task granularity: 14
cat("Progress bar enabled:", bp_param$progressbar, "\n")
## Progress bar enabled: TRUE
Define Analysis Comparisons
# Define the specific comparisons for our analysis
comparisons <- list(
# 1. All Treatment Groups (comprehensive analysis)
"All_Treatments" = list(
description = "Comprehensive analysis across all treatment combinations",
design = ~ Treatment,
contrasts = list(
"A_minus_T_minus_P_plus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_minus_T_minus_P_plus", "A_minus_T_minus_P_minus"),
"A_plus_T_minus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_minus_P_minus", "A_minus_T_minus_P_minus"),
"A_plus_T_minus_P_plus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_minus_P_plus", "A_minus_T_minus_P_minus"),
"A_minus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_minus_T_plus_P_minus", "A_minus_T_minus_P_minus"),
"A_minus_T_plus_P_plus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_minus_T_plus_P_plus", "A_minus_T_minus_P_minus"),
"A_plus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_plus_P_minus", "A_minus_T_minus_P_minus"),
"A_plus_T_plus_P_plus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_plus_P_plus", "A_minus_T_minus_P_minus")
)
),
# 2. Parasite Effect (A_minus_T_minus_P_minus vs A_minus_T_minus_P_plus)
"Parasite_Effect" = list(
description = "Baseline parasite effect: A_minus_T_minus_P_minus vs A_minus_T_minus_P_plus",
design = ~ Treatment,
contrasts = list(
"A_minus_T_minus_P_plus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_minus_T_minus_P_plus", "A_minus_T_minus_P_minus")
)
),
# 3. Historical Contingency (A_minus_T_minus_P_plus vs A_plus_T_minus_P_plus, A_minus_T_plus_P_plus, A_plus_T_plus_P_plus)
"Historical_Contingency" = list(
description = "How prior stressors affect parasite response: A_minus_T_minus_P_plus vs (A_plus_T_minus_P_plus, A_minus_T_plus_P_plus, A_plus_T_plus_P_plus)",
design = ~ Treatment,
contrasts = list(
"A_plus_T_minus_P_plus_vs_A_minus_T_minus_P_plus" = c("Treatment", "A_plus_T_minus_P_plus", "A_minus_T_minus_P_plus"),
"A_minus_T_plus_P_plus_vs_A_minus_T_minus_P_plus" = c("Treatment", "A_minus_T_plus_P_plus", "A_minus_T_minus_P_plus"),
"A_plus_T_plus_P_plus_vs_A_minus_T_minus_P_plus" = c("Treatment", "A_plus_T_plus_P_plus", "A_minus_T_minus_P_plus")
)
),
# 4. Recovery Analysis (A_minus_T_minus_P_minus vs A_plus_T_minus_P_minus, A_minus_T_plus_P_minus, A_plus_T_plus_P_minus)
"Recovery_Analysis" = list(
description = "How prior stressors affect recovery: A_minus_T_minus_P_minus vs (A_plus_T_minus_P_minus, A_minus_T_plus_P_minus, A_plus_T_plus_P_minus)",
design = ~ Treatment,
contrasts = list(
"A_plus_T_minus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_minus_P_minus", "A_minus_T_minus_P_minus"),
"A_minus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_minus_T_plus_P_minus", "A_minus_T_minus_P_minus"),
"A_plus_T_plus_P_minus_vs_A_minus_T_minus_P_minus" = c("Treatment", "A_plus_T_plus_P_minus", "A_minus_T_minus_P_minus")
)
)
)
Code
(hide)
Quality Control
Sample Distance Heatmap

Principal Component Analysis
## using ntop=500 top features by variance

Manage Saved Results
# Check which analyses have saved results
cat("=== Checking for saved results ===\n")
## === Checking for saved results ===
for (analysis_name in names(comparisons)) {
if (results_exist(analysis_name)) {
cat("✓", analysis_name, "- results found\n")
} else {
cat("✗", analysis_name, "- no saved results\n")
}
}
## ✓ All_Treatments - results found
## ✓ Parasite_Effect - results found
## ✓ Historical_Contingency - results found
## ✓ Recovery_Analysis - results found
# Set this to TRUE if you want to recompute all analyses
# Set to FALSE to use existing results when available
force_recompute_all <- FALSE
# You can also set individual analyses to recompute
force_recompute_individual <- list(
"All_Treatments" = FALSE,
"Parasite_Effect" = FALSE,
"Historical_Contingency" = FALSE,
"Recovery_Analysis" = FALSE
)
cat("\nForce recompute all:", force_recompute_all, "\n")
##
## Force recompute all: FALSE
cat("Individual force recompute settings:\n")
## Individual force recompute settings:
for (analysis_name in names(force_recompute_individual)) {
cat(" ", analysis_name, ":", force_recompute_individual[[analysis_name]], "\n")
}
## All_Treatments : FALSE
## Parasite_Effect : FALSE
## Historical_Contingency : FALSE
## Recovery_Analysis : FALSE
# Uncomment the lines below if you want to clean up saved results
# cleanup_saved_results() # Remove all saved results
# cleanup_saved_results(c("All_Treatments", "Parasite_Effect")) # Remove specific analyses
cat("\n=== Usage Instructions ===\n")
##
## === Usage Instructions ===
cat("1. Set force_recompute_all = TRUE to recompute all analyses\n")
## 1. Set force_recompute_all = TRUE to recompute all analyses
cat("2. Set individual force_recompute_individual values to TRUE to recompute specific analyses\n")
## 2. Set individual force_recompute_individual values to TRUE to recompute specific analyses
cat("3. Use cleanup_saved_results() to remove all saved results\n")
## 3. Use cleanup_saved_results() to remove all saved results
cat("4. Use cleanup_saved_results(c('analysis1', 'analysis2')) to remove specific analyses\n")
## 4. Use cleanup_saved_results(c('analysis1', 'analysis2')) to remove specific analyses
Run DESeq2 Analyses
# Monitor initial performance
cat("=== Starting DESeq2 Analysis ===\n")
## === Starting DESeq2 Analysis ===
monitor_performance()
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7248456 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 29991555 228.9 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 37.15% user, 15.20% sys, 47.63% idle
## Active parallel workers: 7
## ========================
# Optimize memory before starting
optimize_memory()
## Optimizing memory usage...
## Memory optimization complete
# Run all analyses
all_results <- list()
for (analysis_name in names(comparisons)) {
cat("\n--- Starting analysis:", analysis_name, "---\n")
# Monitor performance before each analysis
monitor_performance()
# Run the analysis
all_results[[analysis_name]] <- run_analysis_set(analysis_name, comparisons[[analysis_name]], force_recompute_all || force_recompute_individual[[analysis_name]])
# Optimize memory after each analysis
optimize_memory()
cat("--- Completed analysis:", analysis_name, "---\n")
}
##
## --- Starting analysis: All_Treatments ---
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7248618 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 29992406 228.9 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 35.8% user, 14.66% sys, 50.24% idle
## Active parallel workers: 7
## ========================
##
## === Running All_Treatments ===
## Description: Comprehensive analysis across all treatment combinations
## Loading existing results for All_Treatments
## Loading existing results for: All_Treatments
## Optimizing memory usage...
## Memory optimization complete
## --- Completed analysis: All_Treatments ---
##
## --- Starting analysis: Parasite_Effect ---
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7249477 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 31010273 236.6 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 35.67% user, 12.16% sys, 52.16% idle
## Active parallel workers: 7
## ========================
##
## === Running Parasite_Effect ===
## Description: Baseline parasite effect: A_minus_T_minus_P_minus vs A_minus_T_minus_P_plus
## Loading existing results for Parasite_Effect
## Loading existing results for: Parasite_Effect
## Optimizing memory usage...
## Memory optimization complete
## --- Completed analysis: Parasite_Effect ---
##
## --- Starting analysis: Historical_Contingency ---
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7249616 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 31155745 237.7 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 40.50% user, 12.3% sys, 47.45% idle
## Active parallel workers: 7
## ========================
##
## === Running Historical_Contingency ===
## Description: How prior stressors affect parasite response: A_minus_T_minus_P_plus vs (A_plus_T_minus_P_plus, A_minus_T_plus_P_plus, A_plus_T_plus_P_plus)
## Loading existing results for Historical_Contingency
## Loading existing results for: Historical_Contingency
## Optimizing memory usage...
## Memory optimization complete
## --- Completed analysis: Historical_Contingency ---
##
## --- Starting analysis: Recovery_Analysis ---
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7249841 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 31591152 241.1 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 37.11% user, 14.51% sys, 48.36% idle
## Active parallel workers: 7
## ========================
##
## === Running Recovery_Analysis ===
## Description: How prior stressors affect recovery: A_minus_T_minus_P_minus vs (A_plus_T_minus_P_minus, A_minus_T_plus_P_minus, A_plus_T_plus_P_minus)
## Loading existing results for Recovery_Analysis
## Loading existing results for: Recovery_Analysis
## Optimizing memory usage...
## Memory optimization complete
## --- Completed analysis: Recovery_Analysis ---
# Final performance check
cat("\n=== Final Performance Check ===\n")
##
## === Final Performance Check ===
monitor_performance()
## === Performance Monitor ===
## Memory usage:
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 7250003 387.2 11112538 593.5 NA 11112538 593.5
## Vcells 32026015 244.4 54056739 412.5 32768 54056739 412.5
## CPU info:
## CPU usage: 35.85% user, 13.92% sys, 50.21% idle
## Active parallel workers: 7
## ========================
# Print summary of results
cat("\n=== Analysis Summary ===\n")
##
## === Analysis Summary ===
for (analysis_name in names(all_results)) {
cat("\n", analysis_name, ":\n")
for (contrast_name in names(all_results[[analysis_name]])) {
res <- all_results[[analysis_name]][[contrast_name]]
cat(" ", contrast_name, ":\n")
cat(" Significant genes (padj < 0.05):", sum(res$padj < 0.05, na.rm=TRUE), "\n")
cat(" Upregulated genes:", sum(res$padj < 0.05 & res$log2FoldChange > 0, na.rm=TRUE), "\n")
cat(" Downregulated genes:", sum(res$padj < 0.05 & res$log2FoldChange < 0, na.rm=TRUE), "\n")
}
}
##
## All_Treatments :
## A_minus_T_minus_P_plus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 3822
## Upregulated genes: 2012
## Downregulated genes: 1810
## A_plus_T_minus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 42
## Upregulated genes: 12
## Downregulated genes: 30
## A_plus_T_minus_P_plus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 6411
## Upregulated genes: 3426
## Downregulated genes: 2985
## A_minus_T_plus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 66
## Upregulated genes: 13
## Downregulated genes: 53
## A_minus_T_plus_P_plus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 6144
## Upregulated genes: 3213
## Downregulated genes: 2931
## A_plus_T_plus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 123
## Upregulated genes: 22
## Downregulated genes: 101
## A_plus_T_plus_P_plus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 4930
## Upregulated genes: 2574
## Downregulated genes: 2356
##
## Parasite_Effect :
## A_minus_T_minus_P_plus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 3822
## Upregulated genes: 2012
## Downregulated genes: 1810
##
## Historical_Contingency :
## A_plus_T_minus_P_plus_vs_A_minus_T_minus_P_plus :
## Significant genes (padj < 0.05): 55
## Upregulated genes: 33
## Downregulated genes: 22
## A_minus_T_plus_P_plus_vs_A_minus_T_minus_P_plus :
## Significant genes (padj < 0.05): 132
## Upregulated genes: 95
## Downregulated genes: 37
## A_plus_T_plus_P_plus_vs_A_minus_T_minus_P_plus :
## Significant genes (padj < 0.05): 22
## Upregulated genes: 6
## Downregulated genes: 16
##
## Recovery_Analysis :
## A_plus_T_minus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 42
## Upregulated genes: 12
## Downregulated genes: 30
## A_minus_T_plus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 66
## Upregulated genes: 13
## Downregulated genes: 53
## A_plus_T_plus_P_minus_vs_A_minus_T_minus_P_minus :
## Significant genes (padj < 0.05): 123
## Upregulated genes: 22
## Downregulated genes: 101